/**
 * Licensed to the Apache Software Foundation (ASF) under one or more contributor license
 * agreements. See the NOTICE file distributed with this work for additional information regarding
 * copyright ownership. The ASF licenses this file to You under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance with the License. You may obtain a
 * copy of the License at
 * 
 * http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software distributed under the License
 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
 * or implied. See the License for the specific language governing permissions and limitations under
 * the License.
 */

package edu.uci.ics.crawler4j.crawler;

import java.util.ArrayList;
import java.util.List;

public class CrawlConfig
{

    /**
     * The folder which will be used by crawler for storing the intermediate crawl data. The content
     * of this folder should not be modified manually.
     */
    private String crawlStorageFolder;

    /**
     * If this feature is enabled, you would be able to resume a previously stopped/crashed crawl.
     * However, it makes crawling slightly slower
     */
    private boolean resumableCrawling = false;

    /**
     * Maximum depth of crawling For unlimited depth this parameter should be set to -1
     */
    private int maxDepthOfCrawling = -1;

    /**
     * Maximum number of pages to fetch For unlimited number of pages, this parameter should be set
     * to -1
     */
    private int maxPagesToFetch = -1;

    /**
     * user-agent string that is used for representing your crawler to web servers. See
     * http://en.wikipedia.org/wiki/User_agent for more details
     */
    private String userAgentString = "crawler4j (http://code.google.com/p/crawler4j/)";

    /**
     * Politeness delay in milliseconds (delay between sending two requests to the same host).
     */
    private int politenessDelay = 200;

    /**
     * Should we also crawl https pages?
     */
    private boolean includeHttpsPages = false;

    /**
     * Should we fetch binary content such as images, audio, ...?
     */
    private boolean includeBinaryContentInCrawling = false;

    /**
     * Maximum Connections per host
     */
    private int maxConnectionsPerHost = 100;

    /**
     * Maximum total connections
     */
    private int maxTotalConnections = 100;

    /**
     * Socket timeout in milliseconds
     */
    private int socketTimeout = 20000;

    /**
     * Connection timeout in milliseconds
     */
    private int connectionTimeout = 30000;

    /**
     * Max number of outgoing links which are processed from a page
     */
    private int maxOutgoingLinksToFollow = 5000;

    /**
     * Max allowed size of a page. Pages larger than this size will not be fetched.
     */
    private int maxDownloadSize = 1048576;

    /**
     * Should we follow redirects?
     */
    private boolean followRedirects = true;

    /**
     * If crawler should run behind a proxy, this parameter can be used for specifying the proxy
     * host.
     */
    private String proxyHost = null;

    /**
     * If crawler should run behind a proxy, this parameter can be used for specifying the proxy
     * port.
     */
    private int proxyPort = 80;

    /**
     * If crawler should run behind a proxy and user/pass is needed for authentication in proxy,
     * this parameter can be used for specifying the username.
     */
    private String proxyUsername = null;

    /**
     * If crawler should run behind a proxy and user/pass is needed for authentication in proxy,
     * this parameter can be used for specifying the password.
     */
    private String proxyPassword = null;

    /**
     * List of possible authentications needed by crawling
     */
    private List<AuthInfo> authInfo;

    public CrawlConfig()
    {
    }

    /**
     * Validates the configs specified by this instance.
     * 
     * @throws Exception
     */
    public void validate() throws Exception
    {
        if (crawlStorageFolder == null) {
            throw new Exception("Crawl storage folder is not set in the CrawlConfig.");
        }
        if (politenessDelay < 0) {
            throw new Exception("Invalid value for politeness delay: " + politenessDelay);
        }
        if (maxDepthOfCrawling < -1) {
            throw new Exception("Maximum crawl depth should be either a positive number or -1 for unlimited depth.");
        }
        if (maxDepthOfCrawling > Short.MAX_VALUE) {
            throw new Exception("Maximum value for crawl depth is " + Short.MAX_VALUE);
        }

    }

    public String getCrawlStorageFolder()
    {
        return crawlStorageFolder;
    }

    /**
     * The folder which will be used by crawler for storing the intermediate crawl data. The content
     * of this folder should not be modified manually.
     */
    public void setCrawlStorageFolder(String crawlStorageFolder)
    {
        this.crawlStorageFolder = crawlStorageFolder;
    }

    public boolean isResumableCrawling()
    {
        return resumableCrawling;
    }

    /**
     * If this feature is enabled, you would be able to resume a previously stopped/crashed crawl.
     * However, it makes crawling slightly slower
     */
    public void setResumableCrawling(boolean resumableCrawling)
    {
        this.resumableCrawling = resumableCrawling;
    }

    public int getMaxDepthOfCrawling()
    {
        return maxDepthOfCrawling;
    }

    /**
     * Maximum depth of crawling For unlimited depth this parameter should be set to -1
     */
    public void setMaxDepthOfCrawling(int maxDepthOfCrawling)
    {
        this.maxDepthOfCrawling = maxDepthOfCrawling;
    }

    public int getMaxPagesToFetch()
    {
        return maxPagesToFetch;
    }

    /**
     * Maximum number of pages to fetch For unlimited number of pages, this parameter should be set
     * to -1
     */
    public void setMaxPagesToFetch(int maxPagesToFetch)
    {
        this.maxPagesToFetch = maxPagesToFetch;
    }

    public String getUserAgentString()
    {
        return userAgentString;
    }

    /**
     * user-agent string that is used for representing your crawler to web servers. See
     * http://en.wikipedia.org/wiki/User_agent for more details
     */
    public void setUserAgentString(String userAgentString)
    {
        this.userAgentString = userAgentString;
    }

    public int getPolitenessDelay()
    {
        return politenessDelay;
    }

    /**
     * Politeness delay in milliseconds (delay between sending two requests to the same host).
     * 
     * @param politenessDelay the delay in milliseconds.
     */
    public void setPolitenessDelay(int politenessDelay)
    {
        this.politenessDelay = politenessDelay;
    }

    public boolean isIncludeHttpsPages()
    {
        return includeHttpsPages;
    }

    /**
     * Should we also crawl https pages?
     */
    public void setIncludeHttpsPages(boolean includeHttpsPages)
    {
        this.includeHttpsPages = includeHttpsPages;
    }

    public boolean isIncludeBinaryContentInCrawling()
    {
        return includeBinaryContentInCrawling;
    }

    /**
     * Should we fetch binary content such as images, audio, ...?
     */
    public void setIncludeBinaryContentInCrawling(boolean includeBinaryContentInCrawling)
    {
        this.includeBinaryContentInCrawling = includeBinaryContentInCrawling;
    }

    public int getMaxConnectionsPerHost()
    {
        return maxConnectionsPerHost;
    }

    /**
     * Maximum Connections per host
     */
    public void setMaxConnectionsPerHost(int maxConnectionsPerHost)
    {
        this.maxConnectionsPerHost = maxConnectionsPerHost;
    }

    public int getMaxTotalConnections()
    {
        return maxTotalConnections;
    }

    /**
     * Maximum total connections
     */
    public void setMaxTotalConnections(int maxTotalConnections)
    {
        this.maxTotalConnections = maxTotalConnections;
    }

    public int getSocketTimeout()
    {
        return socketTimeout;
    }

    /**
     * Socket timeout in milliseconds
     */
    public void setSocketTimeout(int socketTimeout)
    {
        this.socketTimeout = socketTimeout;
    }

    public int getConnectionTimeout()
    {
        return connectionTimeout;
    }

    /**
     * Connection timeout in milliseconds
     */
    public void setConnectionTimeout(int connectionTimeout)
    {
        this.connectionTimeout = connectionTimeout;
    }

    public int getMaxOutgoingLinksToFollow()
    {
        return maxOutgoingLinksToFollow;
    }

    /**
     * Max number of outgoing links which are processed from a page
     */
    public void setMaxOutgoingLinksToFollow(int maxOutgoingLinksToFollow)
    {
        this.maxOutgoingLinksToFollow = maxOutgoingLinksToFollow;
    }

    public int getMaxDownloadSize()
    {
        return maxDownloadSize;
    }

    /**
     * Max allowed size of a page. Pages larger than this size will not be fetched.
     */
    public void setMaxDownloadSize(int maxDownloadSize)
    {
        this.maxDownloadSize = maxDownloadSize;
    }

    public boolean isFollowRedirects()
    {
        return followRedirects;
    }

    /**
     * Should we follow redirects?
     */
    public void setFollowRedirects(boolean followRedirects)
    {
        this.followRedirects = followRedirects;
    }

    public String getProxyHost()
    {
        return proxyHost;
    }

    /**
     * If crawler should run behind a proxy, this parameter can be used for specifying the proxy
     * host.
     */
    public void setProxyHost(String proxyHost)
    {
        this.proxyHost = proxyHost;
    }

    public int getProxyPort()
    {
        return proxyPort;
    }

    /**
     * If crawler should run behind a proxy, this parameter can be used for specifying the proxy
     * port.
     */
    public void setProxyPort(int proxyPort)
    {
        this.proxyPort = proxyPort;
    }

    public String getProxyUsername()
    {
        return proxyUsername;
    }

    /**
     * If crawler should run behind a proxy and user/pass is needed for authentication in proxy,
     * this parameter can be used for specifying the username.
     */
    public void setProxyUsername(String proxyUsername)
    {
        this.proxyUsername = proxyUsername;
    }

    public String getProxyPassword()
    {
        return proxyPassword;
    }

    /**
     * If crawler should run behind a proxy and user/pass is needed for authentication in proxy,
     * this parameter can be used for specifying the password.
     */
    public void setProxyPassword(String proxyPassword)
    {
        this.proxyPassword = proxyPassword;
    }

    /**
     * @return the authenticationInformations
     */
    public List<AuthInfo> getAuthInfo()
    {
        return authInfo;
    }

    public void addAuthInfo(AuthInfo authInfo)
    {
        if (this.authInfo == null) {
            this.authInfo = new ArrayList<AuthInfo>();
        }
        this.authInfo.add(authInfo);
    }

    /**
     * @param authInfo the authenticationInformations to set
     */
    public void setAuthInfo(List<AuthInfo> authInfo)
    {
        this.authInfo = authInfo;
    }

    @Override
    public String toString()
    {
        StringBuilder sb = new StringBuilder();
        sb.append("Crawl storage folder: ").append(getCrawlStorageFolder()).append("\n");
        sb.append("Resumable crawling: ").append(isResumableCrawling()).append("\n");
        sb.append("Max depth of crawl: ").append(getMaxDepthOfCrawling()).append("\n");
        sb.append("Max pages to fetch: ").append(getMaxPagesToFetch()).append("\n");
        sb.append("User agent string: ").append(getUserAgentString()).append("\n");
        sb.append("Include https pages: ").append(isIncludeHttpsPages()).append("\n");
        sb.append("Include binary content: ").append(isIncludeBinaryContentInCrawling()).append("\n");
        sb.append("Max connections per host: ").append(getMaxConnectionsPerHost()).append("\n");
        sb.append("Max total connections: ").append(getMaxTotalConnections()).append("\n");
        sb.append("Socket timeout: ").append(getSocketTimeout()).append("\n");
        sb.append("Max total connections: ").append(getMaxTotalConnections()).append("\n");
        sb.append("Max outgoing links to follow: ").append(getMaxOutgoingLinksToFollow()).append("\n");
        sb.append("Max download size: ").append(getMaxDownloadSize()).append("\n");
        sb.append("Should follow redirects?: ").append(isFollowRedirects()).append("\n");
        sb.append("Authentication information: ").append(authInfo);
        sb.append("Proxy host: ").append(getProxyHost()).append("\n");
        sb.append("Proxy port: ").append(getProxyPort()).append("\n");
        sb.append("Proxy username: ").append(getProxyUsername()).append("\n");
        sb.append("Proxy password: ").append(getProxyPassword()).append("\n");

        return sb.toString();
    }
}
